today <- Sys.Date() # today's date

Mise à jour du 2021-03-09. Source des données : https://www.data.gouv.fr/fr/datasets/donnees-de-laboratoires-pour-le-depistage-indicateurs-sur-les-variants/

Légende

library("RColorBrewer")
brw <- brewer.pal(12, "Set3")
brw[2] <- brw[12] # Darker yellow
brw[10] <- brw[11] # Lighter shade
colsAge <- c("#000000FF", brw[1:10])
names(colsAge) <- c("0", "9", "19", "29", "39", "49", "59", "69", "79", "89", "90")
pchAge <- c(16, 0:9)
names(pchAge) <- names(colsAge)
cexAge <- c(1.2, rep(1, 10))
names(cexAge) <- names(colsAge)

ages <- c("tous", "0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+")
par(mfrow = c(1, 1))
plot(0:1, 0:1, type = "n", axes = FALSE, xlab = "", ylab = "")
legend(x = 0.5, y = 1, legend = ages, pch = pchAge, col = colsAge)

Données France

Load data

# Données France
URL <- "https://www.data.gouv.fr/fr/datasets/r/c43d7f3f-c9f5-436b-9b26-728f80e0fd52"
dataFile <- paste0("data/France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.France <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)


# Format date
dat.France$date1 <- as.Date(substring(dat.France$semaine, 1, 10))
dat.France$date2 <- as.Date(substring(dat.France$semaine, 12, 21))

# Compute data on total tests
dat.France$Nb_tests <- dat.France$Nb_tests_PCR_TA_crible / (dat.France$Prc_tests_PCR_TA_crible / 100)
# Compare to another source
URL <- "https://www.data.gouv.fr/fr/datasets/r/dd0de5d9-b5a5-4503-930a-7b08dc0adc7c"
dataFile <- paste0("data/tests-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)


URL <- "https://www.data.gouv.fr/fr/datasets/r/c1167c4e-8c89-40f2-adb3-1954f8fedfa7"
dataFile <- paste0("data/tests7j-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests7j <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
dat.tests7j$date1 <- as.Date(substring(dat.tests7j$semaine_glissante, 1, 10))
dat.tests7j$date2 <- as.Date(substring(dat.tests7j$semaine_glissante, 12, 21))

# P nombre de tests positifs
# T nombre de tests total

dat.tests$date <- as.Date(dat.tests$jour)
dateRange <- range(c(range(dat.France$date1), range(dat.France$date2)))

subdat.tests <- dat.tests[dat.tests$date >= dateRange[1] & dat.tests$date <= dateRange[2],]

# Function to compute a sliding window 
sliding.window <- function(v, winwdt = 7, pos = 4, na.rm = TRUE){
  # v vector to be averaged/summed
  # winwdt width of the window 
  # pos position of the focal day in the window
  # FUN function to apply
  n <- length(v)
  # Initialize output vector
  out <- 0 * v + (-1)
  out[1:(pos-1)] <- NA
  out[(n + 1 - winwdt + pos) : n] <- NA
  
  for(i in pos : (n - winwdt + pos)){
    out[i] <- mean(v[(i - pos + 1):(i + winwdt - pos)], na.rm = na.rm)
  }
  return(out[1:n])
}

subdat.tests.0 <- subdat.tests[subdat.tests$cl_age90 == 0, ]
dat.France.0 <- dat.France[dat.France$cl_age90 == 0, ]

subdat.tests.0$P.7.1 <- 7 * sliding.window(subdat.tests.0$P, pos = 1)
subdat.tests.0$P.7.4 <- 7 * sliding.window(subdat.tests.0$P, pos = 4)
subdat.tests.0$P.7.7 <- 7 * sliding.window(subdat.tests.0$P, pos = 7)
subdat.tests.0$P.8.8 <- 8 * sliding.window(subdat.tests.0$P, pos = 8, winwdt = 8)

plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16, 
     xlab = "date", ylab = "nombre de tests")
points(subdat.tests.0$date, subdat.tests.0$P.7.1, ylim = c(0, 5*10^5), col = "red")
points(subdat.tests.0$date, subdat.tests.0$P.7.4, ylim = c(0, 5*10^5), col = "green")
points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
points(subdat.tests.0$date, subdat.tests.0$P.8.8, ylim = c(0, 5*10^5), col = "purple")
points(dat.tests7j$date2, dat.tests7j$P, pch = 2)
legend(x = as.Date("2021-02-18"), y = 200000, col = c("black", "red", "green", "blue", "purple", "black"), legend = c("sidep tests", "w7, c1", "w7, c4", "w7, c7", "w8, c8", "sidep 7j-fin"), pch = c(16, rep(1, 4), 2))

Legend notation:
w: width of the window, c: position of the index day.
So the sliding window is on the 7 last days. The difference may be due to the variant data being in terms of tests, and the other in terms of people, with duplicates removed.

#plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16, 
#     xlab = "date", ylab = "comparaison nombre de tests")
#points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
#points(subdat.tests.0$date, 1.17*subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "orange")


dat.France.ages <- dat.France[dat.France$cl_age90 != 0,]

V1

Plot

# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V1, ylim = c(0, 100), 
     col = colsAge[as.character(dat.France$cl_age90)], 
     pch = pchAge[as.character(dat.France$cl_age90)], 
     cex = cexAge[as.character(dat.France$cl_age90)], 
     xlab = "date", ylab = "Proportion V1", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)

ageClasses <- sort(unique(dat.France$cl_age90))
nAge <- length(ageClasses)

for(iage in unique(dat.France$cl_age90)){
  sub <- dat.France[dat.France$cl_age90 == iage, ]
  V1 <- sub$Prc_susp_501Y_V1/100
  t <- sub$date2
  s <- diff(V1) / (V1[-length(V1)]*(1-V1[-length(V1)]))
  plot(t[-length(V1)], s, ylim = c(-0.1, 0.1),  main = iage)
  print(c(iage, mean(s)))
}

Test

Model over time

# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.France.ages$V1 <- dat.France.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.France.ages$notV1 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.France.ages$notV1.narm <- dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS

# Check that columns currently sum
all(dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS + dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_IND - dat.France.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V1, notV1) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl)
## 
## Call:
## glm(formula = cbind(V1, notV1) ~ date2 + cl_age90, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -14.015   -5.660   -1.434    3.372   12.882  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.861e+02  6.218e+00 -158.58   <2e-16 ***
## date2        5.281e-02  3.328e-04  158.67   <2e-16 ***
## cl_age90    -7.052e-03  7.232e-05  -97.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 41863.9  on 169  degrees of freedom
## Residual deviance:  6080.8  on 167  degrees of freedom
## AIC: 7687.6
## 
## Number of Fisher Scoring iterations: 3
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl.narm)
## 
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 + cl_age90, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -13.1530   -5.1441   -0.9887    3.0369   12.6468  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.102e+03  6.635e+00 -166.09   <2e-16 ***
## date2        5.903e-02  3.551e-04  166.21   <2e-16 ***
## cl_age90    -7.666e-03  7.721e-05  -99.28   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 44062.4  on 169  degrees of freedom
## Residual deviance:  5369.5  on 167  degrees of freedom
## AIC: 6955.3
## 
## Number of Fisher Scoring iterations: 3

V2/V3

Plot

# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V2_3, ylim = c(0, 100), 
     col = colsAge[as.character(dat.France$cl_age90)], 
     pch = pchAge[as.character(dat.France$cl_age90)], 
     cex = cexAge[as.character(dat.France$cl_age90)], 
     xlab = "date", ylab = "Proportion V2/V3", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)

Test

# Create new colums with information on number of specific PCR tests
# PCR with V2/V3 result
dat.France.ages$V23 <- dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V23)
dat.France.ages$notV23 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.France.ages$notV23.narm <- dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_ABS


# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V23, notV23) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl)
## 
## Call:
## glm(formula = cbind(V23, notV23) ~ date2 + cl_age90, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.1093  -2.4440  -0.9282   1.5813   4.9755  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.019e+02  1.397e+01  -14.46   <2e-16 ***
## date2        1.066e-02  7.475e-04   14.26   <2e-16 ***
## cl_age90    -3.127e-03  1.644e-04  -19.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1646.3  on 169  degrees of freedom
## Residual deviance: 1064.3  on 167  degrees of freedom
## AIC: 2383.7
## 
## Number of Fisher Scoring iterations: 4
# Ignoring IND
mdl.narm <- glm(cbind(V23, notV23.narm) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl.narm)
## 
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ date2 + cl_age90, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.7362  -2.2958  -0.8464   1.5011   4.9508  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.964e+02  1.400e+01  -14.03   <2e-16 ***
## date2        1.037e-02  7.495e-04   13.83   <2e-16 ***
## cl_age90    -3.006e-03  1.654e-04  -18.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1503.92  on 169  degrees of freedom
## Residual deviance:  967.19  on 167  degrees of freedom
## AIC: 2285.7
## 
## Number of Fisher Scoring iterations: 4

Données régions

URL <- "https://www.data.gouv.fr/fr/datasets/r/73e8851a-d851-43f8-89e4-6178b35b7127"
dataFile <- paste0("data/Regions_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.regions <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)

# Format date
dat.regions$date1 <- as.Date(substring(dat.regions$semaine, 1, 10))
dat.regions$date2 <- as.Date(substring(dat.regions$semaine, 12, 21))
# Codes regions
URL <- "https://www.data.gouv.fr/en/datasets/r/34fc7b52-ef11-4ab0-bc16-e1aae5c942e7"
dataFile <- "data/coderegions.csv"
download.file(URL, dataFile)
codesRegions <- read.csv(dataFile, sep = ",", stringsAsFactors = FALSE)

# Turn into dictionary
regs <- codesRegions$nom_region
names(regs) <- as.character(codesRegions$code_region)
# Add region name
dat.regions$reg_name <- regs[as.character(dat.regions$reg)]
# dat.regions[floor(runif(10)*1000), c("reg", "reg_name")] # check a few names

# What are the other regions??
# aggregate(dat.regions$reg, by = list(dat.regions$reg), FUN = length)
dat.regions.ages <- dat.regions[dat.regions$cl_age90 != 0,]

V1

Plot

tmp <- unique(dat.regions$reg) # Region codes
tmp <- tmp[tmp>10 & tmp <= 93] # Choose only metropolitan regions
par(mfrow = c(4, 3))
for(region in tmp){
  subdat <- dat.regions[dat.regions$reg == region, ]
  plot(subdat$date2, subdat$Prc_susp_501Y_V1, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)], 
       xlab = "date", ylab = "Proportion V1"
       )
}

Test

# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.regions.ages$V1 <- dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV1 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV1.narm <- dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS

# Check that columns currently sum
all(dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS + dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_IND - dat.regions.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE
dat.regions.ages$reg_name.fac <- as.factor(dat.regions.ages$reg_name)

# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
## 
## Call:
## glm(formula = cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -8.5196  -1.2720  -0.0263   0.9814  10.0068  
## 
## Coefficients:
##                                          Estimate Std. Error  z value Pr(>|z|)
## (Intercept)                            -1.018e+03  6.332e+00 -160.732  < 2e-16
## date2                                   5.449e-02  3.389e-04  160.782  < 2e-16
## cl_age90                               -6.954e-03  7.363e-05  -94.434  < 2e-16
## reg_name.facBourgogne-Franche-Comté    -4.424e-01  9.849e-03  -44.917  < 2e-16
## reg_name.facBretagne                    5.461e-01  1.066e-02   51.210  < 2e-16
## reg_name.facCentre-Val de Loire         1.026e-01  1.049e-02    9.782  < 2e-16
## reg_name.facCorse                       8.699e-01  3.995e-02   21.774  < 2e-16
## reg_name.facGrand Est                  -4.762e-01  7.535e-03  -63.198  < 2e-16
## reg_name.facGuadeloupe                  1.503e+00  7.091e-02   21.198  < 2e-16
## reg_name.facGuyane                     -5.714e-01  1.396e-01   -4.093 4.25e-05
## reg_name.facHauts-de-France             5.066e-01  6.294e-03   80.497  < 2e-16
## reg_name.facÃŽle-de-France               4.874e-01  5.783e-03   84.281  < 2e-16
## reg_name.facLa Réunion                 -2.673e+00  4.384e-02  -60.962  < 2e-16
## reg_name.facMartinique                  5.291e-01  6.326e-02    8.364  < 2e-16
## reg_name.facMayotte                    -4.892e+00  2.048e-01  -23.890  < 2e-16
## reg_name.facNormandie                   1.357e-01  9.218e-03   14.725  < 2e-16
## reg_name.facNouvelle-Aquitaine          4.314e-03  8.716e-03    0.495 0.620646
## reg_name.facOccitanie                   3.120e-01  7.572e-03   41.204  < 2e-16
## reg_name.facPays de la Loire           -3.233e-02  8.790e-03   -3.678 0.000235
## reg_name.facProvence-Alpes-Côte d'Azur  4.445e-01  6.656e-03   66.792  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté    ***
## reg_name.facBretagne                   ***
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                     ***
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine            
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103374  on 2874  degrees of freedom
## Residual deviance:  12153  on 2855  degrees of freedom
##   (510 observations deleted due to missingness)
## AIC: 27172
## 
## Number of Fisher Scoring iterations: 5
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
## 
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.7191  -1.1813   0.0000   0.9934   8.1789  
## 
## Coefficients:
##                                          Estimate Std. Error  z value Pr(>|z|)
## (Intercept)                            -1.140e+03  6.797e+00 -167.653  < 2e-16
## date2                                   6.102e-02  3.638e-04  167.723  < 2e-16
## cl_age90                               -7.544e-03  7.899e-05  -95.511  < 2e-16
## reg_name.facBourgogne-Franche-Comté    -4.909e-01  1.010e-02  -48.620  < 2e-16
## reg_name.facBretagne                    5.698e-01  1.122e-02   50.802  < 2e-16
## reg_name.facCentre-Val de Loire         2.218e-01  1.119e-02   19.824  < 2e-16
## reg_name.facCorse                       1.131e+00  4.614e-02   24.519  < 2e-16
## reg_name.facGrand Est                  -4.672e-01  7.809e-03  -59.833  < 2e-16
## reg_name.facGuadeloupe                  1.464e+00  7.423e-02   19.730  < 2e-16
## reg_name.facGuyane                     -4.394e-01  1.461e-01   -3.008  0.00263
## reg_name.facHauts-de-France             6.200e-01  6.666e-03   93.007  < 2e-16
## reg_name.facÃŽle-de-France               6.858e-01  6.156e-03  111.405  < 2e-16
## reg_name.facLa Réunion                 -2.617e+00  4.424e-02  -59.143  < 2e-16
## reg_name.facMartinique                  4.751e-01  6.516e-02    7.291 3.07e-13
## reg_name.facMayotte                    -4.908e+00  2.049e-01  -23.948  < 2e-16
## reg_name.facNormandie                   2.183e-01  9.762e-03   22.362  < 2e-16
## reg_name.facNouvelle-Aquitaine          5.161e-03  9.060e-03    0.570  0.56896
## reg_name.facOccitanie                   3.660e-01  7.965e-03   45.956  < 2e-16
## reg_name.facPays de la Loire           -4.888e-02  9.100e-03   -5.372 7.78e-08
## reg_name.facProvence-Alpes-Côte d'Azur  5.583e-01  7.059e-03   79.086  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté    ***
## reg_name.facBretagne                   ***
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                     ** 
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine            
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 115520  on 2844  degrees of freedom
## Residual deviance:  10258  on 2825  degrees of freedom
##   (510 observations deleted due to missingness)
## AIC: 24970
## 
## Number of Fisher Scoring iterations: 5

V2

par(mfrow = c(4, 3))
for(region in tmp){
  subdat <- dat.regions[dat.regions$reg == region, ]
  plot(subdat$date2, subdat$Prc_susp_501Y_V2_3, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)], 
       xlab = "date", ylab = "Proportion V2/V3"
       )
}

# Create new colums with information on number of specific PCR tests
# PCR with V2/3 result
dat.regions.ages$V23 <- dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV23 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV23.narm <- dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_ABS

# GLM
# Assuming that all IND (indetermine) are non-V23
mdl <- glm(cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
## 
## Call:
## glm(formula = cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.9544  -0.8782  -0.2063   0.4952   5.6473  
## 
## Coefficients:
##                                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -1.596e+02  1.446e+01 -11.031  < 2e-16
## date2                                   8.370e-03  7.741e-04  10.812  < 2e-16
## cl_age90                               -3.097e-03  1.721e-04 -17.999  < 2e-16
## reg_name.facBourgogne-Franche-Comté     3.777e-02  2.568e-02   1.471   0.1413
## reg_name.facBretagne                    4.743e-02  2.735e-02   1.734   0.0829
## reg_name.facCentre-Val de Loire        -8.335e-01  3.952e-02 -21.090  < 2e-16
## reg_name.facCorse                      -1.746e+00  2.305e-01  -7.573 3.64e-14
## reg_name.facGrand Est                   1.916e+00  1.458e-02 131.439  < 2e-16
## reg_name.facGuadeloupe                 -9.810e-01  2.314e-01  -4.240 2.23e-05
## reg_name.facGuyane                     -1.376e+01  2.031e+02  -0.068   0.9460
## reg_name.facHauts-de-France            -4.867e-01  1.861e-02 -26.155  < 2e-16
## reg_name.facÃŽle-de-France               3.848e-01  1.465e-02  26.260  < 2e-16
## reg_name.facLa Réunion                  2.896e+00  2.634e-02 109.970  < 2e-16
## reg_name.facMartinique                 -1.467e+00  3.350e-01  -4.381 1.18e-05
## reg_name.facMayotte                     3.424e+00  3.539e-02  96.767  < 2e-16
## reg_name.facNormandie                  -1.435e-01  2.603e-02  -5.512 3.55e-08
## reg_name.facNouvelle-Aquitaine         -1.374e-01  2.454e-02  -5.600 2.14e-08
## reg_name.facOccitanie                  -9.799e-01  2.816e-02 -34.800  < 2e-16
## reg_name.facPays de la Loire            5.926e-01  1.984e-02  29.875  < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.851e-01  1.855e-02  -9.981  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté       
## reg_name.facBretagne                   .  
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                        
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine         ***
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 72740.2  on 2874  degrees of freedom
## Residual deviance:  5582.3  on 2855  degrees of freedom
##   (510 observations deleted due to missingness)
## AIC: 15796
## 
## Number of Fisher Scoring iterations: 15
# Ignoring IND
mdl.narm <- glm(cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
## 
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.3471  -0.8218  -0.2075   0.5353   5.6238  
## 
## Coefficients:
##                                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -1.603e+02  1.458e+01 -10.994  < 2e-16
## date2                                   8.414e-03  7.804e-04  10.781  < 2e-16
## cl_age90                               -3.061e-03  1.740e-04 -17.595  < 2e-16
## reg_name.facBourgogne-Franche-Comté     2.621e-02  2.570e-02   1.020    0.308
## reg_name.facBretagne                    3.726e-02  2.738e-02   1.361    0.174
## reg_name.facCentre-Val de Loire        -7.882e-01  3.956e-02 -19.924  < 2e-16
## reg_name.facCorse                      -1.708e+00  2.306e-01  -7.408 1.28e-13
## reg_name.facGrand Est                   1.964e+00  1.464e-02 134.195  < 2e-16
## reg_name.facGuadeloupe                 -1.031e+00  2.314e-01  -4.458 8.29e-06
## reg_name.facGuyane                     -1.387e+01  2.242e+02  -0.062    0.951
## reg_name.facHauts-de-France            -4.660e-01  1.863e-02 -25.010  < 2e-16
## reg_name.facÃŽle-de-France               4.362e-01  1.468e-02  29.721  < 2e-16
## reg_name.facLa Réunion                  3.144e+00  2.778e-02 113.176  < 2e-16
## reg_name.facMartinique                 -1.505e+00  3.350e-01  -4.491 7.09e-06
## reg_name.facMayotte                     3.582e+00  3.730e-02  96.021  < 2e-16
## reg_name.facNormandie                  -1.109e-01  2.608e-02  -4.252 2.12e-05
## reg_name.facNouvelle-Aquitaine         -1.353e-01  2.457e-02  -5.505 3.69e-08
## reg_name.facOccitanie                  -9.703e-01  2.818e-02 -34.435  < 2e-16
## reg_name.facPays de la Loire            5.891e-01  1.987e-02  29.647  < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.587e-01  1.857e-02  -8.547  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté       
## reg_name.facBretagne                      
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                        
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine         ***
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 74449.0  on 2844  degrees of freedom
## Residual deviance:  5479.8  on 2825  degrees of freedom
##   (510 observations deleted due to missingness)
## AIC: 15614
## 
## Number of Fisher Scoring iterations: 15

Départements

URL <- "https://www.data.gouv.fr/fr/datasets/r/16f4fd03-797f-4616-bca9-78ff212d06e8"
dataFile <- paste0("data/Dep_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.deps <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)

# Format date
dat.deps$date1 <- as.Date(substring(dat.deps$semaine, 1, 10))
dat.deps$date2 <- as.Date(substring(dat.deps$semaine, 12, 21))
# Add name
deps <- read.csv("data/departement2020.csv", stringsAsFactors = FALSE)
# Turn into dictionnary
dps <- deps$libelle
names(dps) <- as.character(deps$dep)
dat.deps$departement <- dps[as.character(dat.deps$dep)]

V1

par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
  tmp <- dat.deps[dat.deps$dep == idep, ]
  plot(tmp$date2, tmp$Prc_susp_501Y_V1, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)], 
       xlab = "date", ylab = "Proportion V1", 
       main = unique(tmp$departement))
  
#  legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}

V2/V3

par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
  tmp <- dat.deps[dat.deps$dep == idep, ]
  plot(tmp$date2, tmp$Prc_susp_501Y_V2_3, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)], 
       xlab = "date", ylab = "Proportion V2/V3", 
       main = unique(tmp$departement))
  
#  legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}